/*******************************************************************************
Project:		Wealth -- Smith, Zidar, and Zwick
Last modified: 	2021-07-06
Modified by:	Dustin Swonder
Description:	This file replicates and updates Financial Accounts aggregates
				used in capitalization by Saez and Zucman. Specifically: 
					- Section I replicates main aggregates from the parameters
					  file SZ 2020 uses for aggregates
					- Section II of the do-file updates the construction with 
					  the latest vintage of USFA aggregates.
				See notes ``sz2020revisionists_parameters_reconstruction.txt'' 
				and ``Memo_Reconstruct_SZ2020Revisionists_Parameters.pdf'' in
				~/Dropbox/wealth/notes/ds_memos.
*******************************************************************************/

/*******************************************************************************
*** (I) SECTION 1: REPLICATE MAIN AGGREGATES FROM PARAMETERS.XLS ***************
*******************************************************************************/

/*******************************************************************************
	(I.1) Load non-USFA files we need in order to replicate and validate SZ 
		Revisionists totals
*******************************************************************************/

	/***************************************************************************
		(I.1.1) Load table 19 from the Investment Company Institute Retirement 
			tables, which is reproduced on sheet TSB5 in the SZ 2020 aggregates
			spreadsheet. Clean up a bit.
	***************************************************************************/

		/**********************************************************************
			(I.1.1.1) Read in data set and get the rows and columns that we 
				want
		**********************************************************************/

qui import excel using $datadir/ICI/ret_20_q3_data.xls, sheet("Table 19") ///
	cellrange(A5:L79) firstrow clear

keep A Bond Hybrid Moneymarket
foreach column of varlist Bond Hybrid Moneymarket {
	assert `column'[1] == "Assets"
	assert `column'[2] == "Billions"
}

drop in 1/2

rename (A Bond Hybrid Moneymarket) (year bond hybrid moneymarket)

* Drop Q1-Q3 for years with quarterly measurements
qui drop if strlen(year) > 4 & !regexm(year, "Q4")
qui replace year = substr(year, 1, 4)

qui destring _all, replace

		/**********************************************************************
			(I.1.1.2) Scale out of billions into millions
		**********************************************************************/

foreach billions of varlist bond hybrid moneymarket {
	qui replace `billions' = `billions' * 1E3
}

		/**********************************************************************
			(I.1.1.3) Get midyear averages, following SZ, then save as tempfile
		**********************************************************************/

expand 2
sort year
by year: gen midyear = cond(_n == 1, year, year + 1)

* Drop years for which we can't get midyear averages
foreach bookend of numlist 1990 2020 {
	qui count if midyear == `bookend'
	assert `r(N)' == 1
}
drop if inlist(midyear, 1990, 2020)

collapse (mean) bond hybrid moneymarket, by(midyear)

rename midyear year

save $dumpdir/tsb5.dta, replace

	/***************************************************************************
		(I.1.2) Need to take for granted two series from SZ's files: 
			1. Municipal bonds adjustment factor to smooth aggregate munis 
				series from 1993-2004
			2. ``scorpinc'' series to get capitalized estimate of S-corporation
				equity before 1996.
	***************************************************************************/

qui import excel using "$litroot/SZ2020Revisionists/PSZ2020AppendixTablesI(Aggreg).xlsx", ///
	sheet("DataWealth") cellrange(A109:EL162) clear

keep A AB EL

rename (A AB EL) (year muni_adjfactor scorpinc_sz)

* Scale from billions into millions in order to match scale of USFA CSVs
qui replace scorpinc_sz = scorpinc * 1E3

save $dumpdir/szcopy.dta, replace

	/***************************************************************************
		(I.1.3) Load the rest of the DataWealth columns we'll re-create in order
			to build main wealth aggregates; we'll merge these onto our 
			aggregates once we've constructed them in order to check that our 
			totals align
	***************************************************************************/

qui import excel using "$litroot/SZ2020Revisionists/PSZ2020AppendixTablesI(Aggreg).xlsx", ///
	sheet("DataWealth") cellrange(A88:FN162) clear

keep A AC AE AF AG AH AI AK AL AP AQ AS AU AV AW AX AZ BA BB BD BE BI BJ BK BO BS ///
	 BU BZ CB CC CH DU DX EB EC FE FF FI FJ FK FL FM FN G I J K L O P S T X Y Z

rename * *_sz
rename A_sz year

tempfile szdatawealth
save `szdatawealth'

	/***************************************************************************
		(I.1.4) Load SZ revisionists vintage of parameters file used for 
			capitalization totals
	***************************************************************************/

qui import excel using "$litroot/SZ2020Revisionists/PSZ2020Programs/parameters.xlsx", firstrow clear

keep yr ttwealth tt*w ttcurrency ttothdebt ttmmbondfund
drop ttschcw ttpartw // Don't use these in our wealth build, as far as I can tell

rename (yr tt*) (year tt*_sz)
drop if missing(year)

tempfile parameters
save `parameters'

/*******************************************************************************
	(I.2) Pull USFA series we need in order to recreate SZ 2020 revisionists 
		main portfolio categories. According to note at the top of DataWealth
		sheet, data were downloaded June 2020, which was the 2020Q1 release of
		USFA. However, the aggregates actually match the 2020Q2 vintage.
*******************************************************************************/

foreach filename in l108 l117 l121 l122 l218 l219 l221 l223 l227 b101 b101n b101e ///
	b104 all_sectors_levels_a {

	qui import delim using "$datadir/financial_accounts/z1_csv_files_2020Q2/csv/`filename'.csv", ///
			clear varnames(1)

	capture rename fa*, upper
	capture rename fl*, upper
	capture rename lm*, upper
	capture rename *A *
	capture rename *Q *

	rename date year

	capture confirm string variable year
	if _rc == 0 {
		qui drop if !regexm(year, ":Q4") // Want year-end values for quarterly series
		qui replace year = substr(year, 1, 4) 
	}
	qui destring _all, replace ignore("ND")

	if "`filename'" != "l108" {
		qui merge 1:1 year using $dumpdir/merged_usfa_parameters_components.dta, nogen update
	}
	qui save $dumpdir/merged_usfa_parameters_components.dta, replace
}

/* Keep the series we need. I collect these codes by hand and describe them in 
	sz2020revisionists_parameters_reconstruction.txt */
#delimit ;
keep year LM155035015 LM115035023 FL153165105 FL113165105 FL113165405 FL233165605
	LM153064105 LM653064155 LM163064005 LM543064153 FL893131573 LM543131503
	LM593064105 LM593064205 LM653064100 LM653164205 FL593073005 FL594090005 
	FL153030005 FL153091003 FL163030205 LM153061105 LM153061705 LM163061005 
	LM153063005 LM163063005 FL154023005 LM653062003 LM153064205 FL153034005 
	FL634090005 FL633062000 FL153020005 FL163020005 LM153062005 FL592000075 
	FL594090055 FL153040005 FL543150005 FL153166000 FL153168005 FL153169005 
	FL543077073 LM152090205 LM654022055 LM654022005 LM883164133 FL154090005
	LM154022005 FL583150005 FL163070005 FL163034003;
#delimit cr

sort year

/*******************************************************************************
	(I.4) Rename/construct Financial Accounts series in terms of ``third/base-
		layer'' DataWealth columns, as designated in the note.
*******************************************************************************/

#delimit ;
rename (LM155035015 LM115035023 FL153165105 FL113165105 FL113165405 FL233165605 
		LM153064105 LM653064155 LM163064005 LM543064153 FL893131573 LM543131503 
		LM593064105 LM593064205 LM653064100 LM653164205 FL593073005 FL594090005 
		FL153030005 FL153091003 FL163030205 LM153061105 LM153061705 LM163061005 
		LM153063005 LM163063005 FL154023005 LM653062003 LM153064205 FL153034005 
		FL634090005 FL633062000 FL153020005 FL163020005 LM153062005 FL592000075 
		FL594090055 FL153040005 FL543150005 FL153166000 FL153168005 FL153169005
		FL543077073 LM152090205 LM654022055 LM654022005 LM883164133 FL154090005
		LM154022005 FL583150005 FL163070005 FL163034003)
	(BD AK BE AL_1 AL_2 AL_3 J K FE L DU DX CB CC AW AV BZ CH AE AF FL X_1 X_2 FI_FJ 
	 Y FK AS AZ AU O BA BB AG FN AA BS BU AP BO BJ BK_1 BK_2 BK_3 AH T AX EK G S AQ 
	 FM FF);
#delimit cr

qui gen AL = AL_1 + AL_2 + AL_3
qui gen BK = BK_1 + BK_2 + BK_3
qui gen X = X_1 + X_2

drop *_1 *_2 *_3

/*******************************************************************************
	(I.5) Preliminary cleanup, before we construct ``second-layer'' DataWealth 
		columns.
*******************************************************************************/

	/***************************************************************************
		(I.5.1) Deal with missing values following SZ 2020
	***************************************************************************/

foreach missingbefore1981 of varlist DU DX {
	assert missing(`missingbefore1981') if year < 1981
	qui replace `missingbefore1981' = 0 if year < 1981
	assert !missing(`missingbefore1981')	
}

assert missing(AF) if year == 1945
qui replace AF = 0 if year == 1945

assert !missing(DU) & !missing(DX) & !missing(AF)
 
	/***************************************************************************
		(I.5.2) Extrapolate backwards series that don't extend back to 1945
	***************************************************************************/

qui gen I = J + K
assert !missing(I)

qui gen FI = 0.3 * FI_FJ
qui gen FJ = 0.7 * FI_FJ
assert !missing(FI) & !missing(FJ) if year > 1986
drop FI_FJ

qui sort year
qui count if year <= 1986
local row1986 = `r(N)'
assert year[`row1986'] == 1986

forv row = `row1986'(-1)1 {
	local prevyr = `row' + 1

	assert missing(FE[`row']) & missing(FL[`row']) & missing(FJ[`row']) & ///
		missing(FI[`row']) & missing(FN[`row']) & missing(FK[`row'])

	qui replace FE = FE[`prevyr'] * (I + L + CB) / (I[`prevyr'] + L[`prevyr'] + CB[`prevyr']) in `row'
	qui replace FL = FL[`prevyr'] * ((AE + AF) / (AE[`prevyr'] + AF[`prevyr'])) in `row'
	qui replace FJ = FJ[`prevyr'] * (X / X[`prevyr']) in `row'
	qui replace FI = FI[`prevyr'] * (AA / AA[`prevyr']) in `row'
	qui replace FN = FN[`prevyr'] * (AG / AG[`prevyr']) in `row'
	qui replace FK = FK[`prevyr'] * (Y / Y[`prevyr']) in `row'
}

assert !missing(FE) & !missing(FL) & !missing(FJ) & !missing(FI) & !missing(FN) & !missing(FK)

	/***************************************************************************
		(I.5.3) Apply municipal bond adjustment factor from SZ to municipal 
			bonds estimates from 1993 to 2004 
	***************************************************************************/

merge 1:1 year using $dumpdir/szcopy.dta, assert(1 3) keepusing(muni_adjfactor) nogen

qui gen Z = cond(inrange(year, 1993, 2003), AA * muni_adjfactor, AA)
assert !missing(Z)

drop muni_adjfactor

	/***************************************************************************
		(I.5.4) Add difference between adjusted and ``official'' municipal
			bonds concepts from official bonds concept in column S
	***************************************************************************/

qui replace S = S + (Z - AA)

/*******************************************************************************
	(I.6) Construct full set of ``second-layer'' DataWealth aggregates as 
		described in note, and ensure that these match the series in the 
		DataWealth sheet.
*******************************************************************************/

	/***************************************************************************
		(I.6.1) Construct remaining ``second-layer'' DataWealth columns
	***************************************************************************/

qui gen EB = (DU - DX) * (CB + (CC * (AW / AV))) / (CH - BZ)
qui gen EC = (DU - DX) * ((CH - BZ - CB - CC) + CC * (AV - AW) / AV) / (CH - BZ)
qui gen AC = AZ * AU / AV
qui gen P = cond(BA > 0, BB * O / BA, 0) // Set to zero when BA == 0, following SZ
qui gen BI = BJ + BK
qui gen AI = AH - (AK - AL)

	/***************************************************************************
		(I.6.2) Merge on SZ versions of DataWealth columns and cross-validate 
			(SZ columns are down-scaled by a factor of 1E3)
	***************************************************************************/

qui merge 1:1 year using `szdatawealth', assert(3) nogen

foreach checkcol in AC AE AF AG AH AI AK AL AP AQ AS AU AV AW AX AZ BA BB BD BE BI ///
	BJ BK BO BS BU BZ CB CC CH DU DX EB EC FE FF FI FJ FK FL FM FN G I J K L O P S ///
	T X Y Z {

	assert inrange(`checkcol' / (`checkcol'_sz * 1E3), 0.99, 1.01) | ///
		`checkcol' / 1E3 == `checkcol'_sz
}
drop *_sz

/*******************************************************************************
	(I.7) Take mid-year averages of USFA columns, following SZ
*******************************************************************************/

expand 2
sort year
by year: gen midyear = cond(_n == 1, year, year + 1)

* Drop years for which we can't get midyear averages
foreach bookend of numlist 1945 2020 {
	qui count if midyear == `bookend'
	assert `r(N)' == 1
}
qui drop if inlist(midyear, 1945, 2020)

#delimit ;
collapse (mean) AC AE AF AG AH AI AK AL AP AQ AS AU AV AW AX AZ BA BB BD BE BI BJ
	BK BO BS BU BZ CB CC CH DU DX EB EC EK FE FF FI FJ FK FL FM FN G I J K L O P 
	S T X Y Z, by(midyear);
#delimit cr

rename midyear year

/*******************************************************************************
	(I.8) Merge on scorpinc series from SZ small files and use capitalization 
		with average 1997-2011 rate of return on S-corp equity
*******************************************************************************/

	/***************************************************************************
		(I.8.1) Merge on scorpinc, copied directly from SZ; these are.aggregates
			from their small`yr'.dta files. Ensure not missing for years after
			1965
	***************************************************************************/

qui merge 1:1 year using $dumpdir/szcopy.dta, assert(1 3) keepusing(scorpinc_sz) nogen

assert !missing(scorpinc_sz) if year > 1965

	/***************************************************************************
		(I.8.2) Use scorpinc to compute aggregate return on equity for years
			that we have it
	***************************************************************************/

qui gen EM = (scorpinc_sz / EK) // This is return on equity
assert inrange(EM, 0, 1) if !missing(EM)

	/***************************************************************************
		(I.8.3) Get average return on S-corporation equity from 1997-2011 and
			assume this average rate from 1966 to 1996. Then use this to get 
			capitalized totals from 1966 to 1996.
	***************************************************************************/

summ EM if inrange(year, 1997, 2011), meanonly
qui replace EM = `r(mean)' if inrange(year, 1966, 1996)

qui replace EK = scorpinc_sz / EM if inrange(year, 1966, 1996)

assert !missing(EK)
drop scorpinc_sz EM

/*******************************************************************************
	(I.9) Merge on ICI and create key columns in second layer
*******************************************************************************/

qui merge 1:1 year using $dumpdir/tsb5.dta, assert(1 3)
assert _merge == 3 if year >= 1991
assert _merge == 1 if year < 1991
drop _merge

rename (moneymarket EC) (S_TSB5 O_TSB5)
qui gen R_TSB5 = bond + 0.3 * hybrid
qui gen J_TSB5 = T - AC + (AV - AW - AX) * (AU / AV)

assert !missing(J_TSB5) & !missing(O_TSB5)
assert missing(R_TSB5) & missing(S_TSB5) if year < 1991

/* Extrapolate R_TSB5 and S_TSB5 backwards for 1981-1990; set as zero before that,  
	following SZ */
qui replace R_TSB5 = cond(year <= 1980, 0, 0.02 * (year - 1980) * J_TSB5) if year < 1991

foreach year of numlist 1979 1990 {
	sort year 
	qui count if year <= `year'
	local row`year' = `r(N)'
	assert year[`row`year''] == `year'
}

forv row = `row1990'(-1)`row1979' {
	local prevyr = `row' + 1

	assert missing(S_TSB5[`row'])

	qui replace S_TSB5 = S_TSB5[`prevyr'] * (O_TSB5 / O_TSB5[`prevyr']) in `row'
}

assert missing(S_TSB5) if year < 1980
qui replace S_TSB5 = 0 if year < 1980

assert !missing(O_TSB5) & !missing(R_TSB5) & !missing(S_TSB5)

/*******************************************************************************
	(I.10) Construct full set of ``third-layer'' DataWealth aggregates as 
		described in note, and check against parameters.xlsx
*******************************************************************************/

	/***************************************************************************
		(I.10.1) Create parameters wealth concepts
	***************************************************************************/

qui gen ttrentw = AK - AL

assert BA == 0 if year < 1974
assert BA > 0 if year > 1973

/* Bonds and loans held in mutual funds minus IRA holdings of bonds and loans in 
	mutual funds plus (money market fund shares minus munis held through money 
	market fund shares minus money market fund shares of nonprofits) minus 
	money market funds held in IRAs */
qui gen ttmmbondfund = cond(BA > 0, J_TSB5 - R_TSB5 + (O - (BB * O / BA) - FF) - S_TSB5, ///
							J_TSB5 - R_TSB5 - S_TSB5)
assert !missing(ttmmbondfund)

qui gen ttmiscw = G - J - O - S - AE - AF - AG - AH - AU - AP - AQ - AS - FM

qui gen ttdivw = I - FE - EB - EK

qui gen ttinttaxw = (AE + AF - FL) + (X - FJ) + (Y - FK) - (O_TSB5 - R_TSB5 - S_TSB5) + AS

qui gen ttintexmw = Z - FI + AC + P

qui gen ttcurrency = AG - FN

qui gen ttpenw = BS + BU + AP + BO

qui gen ttpeniraw = DU - DX

rename (BE AL BI AI EK BD) (ttmortw ttrentmortw ttothdebt ttschcpartw ttscorw ttrestw)

foreach mortcncpt of varlist ttmortw ttrentmortw ttothdebt {
	assert `mortcncpt' > 0
	replace `mortcncpt' = -1 * `mortcncpt'
}

save $dumpdir/parameters_sz2020revisionists_with_datawealth_columns.dta, replace

keep year tt*
qui ds tt*
local parameterscncpts = "`r(varlist)'"
	
	/***************************************************************************
		(I.10.2) Merge on SZ versions of these concepts and check against what
			I've constructed
	***************************************************************************/

merge 1:1 year using `parameters', assert(2 3) keep(3) nogen

foreach parameterscncpt in `parameterscncpts' {
	assert inrange(`parameterscncpt' / `parameterscncpt'_sz, 0.999, 1.001) | ///
		`parameterscncpt' == `parameterscncpt'_sz
}

drop *_sz

export delimited using $dumpdir/parameters_sz2020revisionists_wealth.csv, replace

/*******************************************************************************
*** (II) SECTION 2: UPDATED AGGREGATES USING MOST RECENT RELEASE ***************
*******************************************************************************/

/*******************************************************************************
	(II.0) Pull auxiliary aggregates for credit card scaling exercise (not 
		included in SZ parameters file)
*******************************************************************************/

	/***************************************************************************
		(II.0.1) Load Financial Accounts data and get two relevant consumer 
			credit components: credit card liabilities and auto loans. Compute 
			midyear averages (following SZ).
	***************************************************************************/

insheet using $datadir/financial_accounts/z1_csv_files_2020Q3/csv/l222.csv, names clear

drop if !regexm(date, "Q4")
isid date

gen year = substr(date, 1, 4)

destring year fl153166100 fl153166400, replace ignore("ND")

assert !missing(year)

rename (fl153166100 fl153166400) (ccbal_usfa autoloans_usfa)

keep if inrange(year, 1945, 2019)

expand 2
sort year
by year: gen midyear = cond(_n == 1, year, year + 1)

* Drop years for which we can't get midyear averages
foreach bookend of numlist 1945 2020 {
	qui count if midyear == `bookend'
	assert `r(N)' == 1
}
drop if inlist(midyear, 1945, 2020)

collapse (mean) *_usfa, by(midyear)

rename midyear year

tempfile usfa_midyear
save `usfa_midyear'

	/***************************************************************************
		(II.0.2) Useful aggregates in the SCF, including credit card balances, 
			private business, and deposits
	***************************************************************************/

use $dtadir/scf_revision.dta, clear

gen schcpartw = pthru - scorw

keep year wgt1B ccbal deposits schcpartw scorw

collapse (sum) ccbal_scf = ccbal ttdeposits_scf = deposits ttschcpartw_scf = schcpartw ///
	ttscorw_scf = scorw [fw = wgt1B], by(year)

/* Undo billion-scaling from collapse, then scale down further into millions to 
	match Financial Accounts */
foreach unscaled of varlist *_scf { 
	replace `unscaled' = `unscaled' / (1E9 * 1E6)
}

tempfile scf
save `scf'

	/***************************************************************************
		(II.0.3) Calculate credit card total scaling down to match SCF
			1. Calculate ratio between credit card balance concepts. 
			2. Interpolate values for between-years in SCF. 
			3. Assign years before 1989 average ratio (ratio is neither growing
				nor declining over time, so this seems defensible).
			4. Calculate SCF-scaled credit card balance (non convenience 
				balances) in USFA
	***************************************************************************/

use `usfa_midyear', clear
merge 1:1 year using `scf', assert(1 3) keepusing(ccbal_scf)
assert _merge == 3 if inrange(year, 1989, 2019) & mod(year - 1989, 3) == 0
assert !inrange(year, 1989, 2019) | mod(year - 1989, 3) > 0 if _merge == 1
drop _merge

gen scf_usfa_ratio_ccbal = ccbal_scf / ccbal_usfa

* Interpolate values in non-SCF years from 1989-2019 linearly
ipolate scf_usfa_ratio_ccbal year, gen(scf_usfa_ratio_ccbal_iplt)

assert !missing(scf_usfa_ratio_ccbal_iplt) if year > 1988
assert missing(scf_usfa_ratio_ccbal) if year < 1989

* Fill in values for pre-1989 years as average over pre-crisis SCF years
qui summ scf_usfa_ratio_ccbal if year < 2007, detail
replace scf_usfa_ratio_ccbal_iplt = `r(mean)' if missing(scf_usfa_ratio_ccbal_iplt)

gen ccbal_scfscale = scf_usfa_ratio_ccbal_iplt * ccbal_usfa // Create adjusted concept

keep year ccbal_scfscale ccbal_usfa

tempfile ccbal_scfscale
save `ccbal_scfscale'

/*******************************************************************************
	(II.1) Pull USFA series we need in order to recreate SZ 2020 revisionists 
		main portfolio categories. According to note at the top of DataWealth
		sheet, data were downloaded June 2020, which was the 2020Q1 release of
		USFA. However, the aggregates actually match the 2020Q2 vintage. 
		
		~Additionally~ we'll pull total DB pensions from L.117 (FL594190045), 
		because we'll want to make a ttpenw concept which uses more than just
		the funded portion of DB.
*******************************************************************************/

foreach filename in l108 l117 l121 l122 l218 l219 l221 l223 l227 b101 b101n b101e ///
	b104 all_sectors_levels_a {

	import delim "$datadir/financial_accounts/z1_csv_files_2020Q3/csv/`filename'.csv", ///
			clear varnames(1)

	capture rename fa*, upper
	capture rename fl*, upper
	capture rename lm*, upper
	capture rename *A *
	capture rename *Q *

	rename date year

	capture confirm string variable year
	if _rc == 0 {
		qui drop if !regexm(year, ":Q4") // Want year-end values for quarterly series
		qui replace year = substr(year, 1, 4) 
	}
	qui destring _all, replace ignore("ND")

	if "`filename'" != "l108" {
		qui merge 1:1 year using $dumpdir/merged_usfa_parameters_components.dta, nogen update
	}
	qui save $dumpdir/merged_usfa_parameters_components.dta, replace
}

/* Keep the series we need. I collect these codes by hand and describe them in 
	sz2020revisionists_parameters_reconstruction.txt */
#delimit ;
keep year LM155035015 LM115035023 FL153165105 FL113165105 FL113165405 FL233165605
	LM153064105 LM653064155 LM163064005 LM543064153 FL893131573 LM543131503
	LM593064105 LM593064205 LM653064100 LM653164205 FL593073005 FL594090005 
	FL153030005 FL153091003 FL163030205 LM153061105 LM153061705 LM163061005 
	LM153063005 LM163063005 FL154023005 LM653062003 LM153064205 FL153034005 
	FL634090005 FL633062000 FL153020005 FL163020005 LM153062005 FL592000075 
	FL594090055 FL153040005 FL543150005 FL153166000 FL153168005 FL153169005 
	FL543077073 LM152090205 LM654022055 LM654022005 LM883164133 FL154090005
	LM154022005 FL583150005 FL163070005 FL163034003 FL594190045;
#delimit cr

sort year

/*******************************************************************************
	(II.2) Rename/construct Financial Accounts series in terms of ``third/base-
		layer'' DataWealth columns, as designated in the note.
*******************************************************************************/

#delimit ;
rename (LM155035015 LM115035023 FL153165105 FL113165105 FL113165405 FL233165605 
		LM153064105 LM653064155 LM163064005 LM543064153 FL893131573 LM543131503 
		LM593064105 LM593064205 LM653064100 LM653164205 FL593073005 FL594090005 
		FL153030005 FL153091003 FL163030205 LM153061105 LM153061705 LM163061005 
		LM153063005 LM163063005 FL154023005 LM653062003 LM153064205 FL153034005 
		FL634090005 FL633062000 FL153020005 FL163020005 LM153062005 FL592000075 
		FL594090055 FL153040005 FL543150005 FL153166000 FL153168005 FL153169005
		FL543077073 LM152090205 LM654022055 LM654022005 LM883164133 FL154090005
		LM154022005 FL583150005 FL163070005 FL163034003 FL594190045)
	(BD AK BE AL_1 AL_2 AL_3 J K FE L DU DX CB CC AW AV BZ CH AE AF FL X_1 X_2 FI_FJ 
	 Y FK AS AZ AU O BA BB AG FN AA BS BU AP BO BJ BK_1 BK_2 BK_3 AH T AX EK G S AQ 
	 FM FF dbtotal);
#delimit cr

qui gen AL = AL_1 + AL_2 + AL_3
qui gen BK = BK_1 + BK_2 + BK_3
qui gen X = X_1 + X_2

drop *_1 *_2 *_3

/*******************************************************************************
	(II.3) Preliminary cleanup, before we construct ``second-layer'' DataWealth 
		columns.
*******************************************************************************/

	/***************************************************************************
		(II.3.1) Deal with missing values following SZ 2020
	***************************************************************************/

foreach missingbefore1981 of varlist DU DX {
	assert missing(`missingbefore1981') if year < 1981
	qui replace `missingbefore1981' = 0 if year < 1981
	assert !missing(`missingbefore1981')	
}

assert missing(AF) if year == 1945
qui replace AF = 0 if year == 1945

assert !missing(DU) & !missing(DX) & !missing(AF)
 
	/***************************************************************************
		(II.3.2) Extrapolate backwards series that don't extend back to 1945
	***************************************************************************/

qui gen I = J + K
assert !missing(I)

qui gen FI = 0.3 * FI_FJ
qui gen FJ = 0.7 * FI_FJ
assert !missing(FI) & !missing(FJ) if year > 1986
drop FI_FJ

qui sort year
qui count if year <= 1986
local row1986 = `r(N)'
assert year[`row1986'] == 1986

forv row = `row1986'(-1)1 {
	local prevyr = `row' + 1

	assert missing(FE[`row']) & missing(FL[`row']) & missing(FJ[`row']) & ///
		missing(FI[`row']) & missing(FN[`row']) & missing(FK[`row'])

	qui replace FE = FE[`prevyr'] * (I + L + CB) / (I[`prevyr'] + L[`prevyr'] + CB[`prevyr']) in `row'
	qui replace FL = FL[`prevyr'] * ((AE + AF) / (AE[`prevyr'] + AF[`prevyr'])) in `row'
	qui replace FJ = FJ[`prevyr'] * (X / X[`prevyr']) in `row'
	qui replace FI = FI[`prevyr'] * (AA / AA[`prevyr']) in `row'
	qui replace FN = FN[`prevyr'] * (AG / AG[`prevyr']) in `row'
	qui replace FK = FK[`prevyr'] * (Y / Y[`prevyr']) in `row'
}

assert !missing(FE) & !missing(FL) & !missing(FJ) & !missing(FI) & !missing(FN) & !missing(FK)

	/***************************************************************************
		(II.3.3) Apply municipal bond adjustment factor from SZ to municipal 
			bonds estimates from 1993 to 2004 
	***************************************************************************/

merge 1:1 year using $dumpdir/szcopy.dta, assert(1 3) keepusing(muni_adjfactor) nogen

qui gen Z = cond(inrange(year, 1993, 2003), AA * muni_adjfactor, AA)
assert !missing(Z)

drop muni_adjfactor

	/***************************************************************************
		(II.3.4) Add difference between adjusted and ``official'' municipal
			bonds concepts from official bonds concept in column S
	***************************************************************************/

qui replace S = S + (Z - AA)

/*******************************************************************************
	(II.4) Construct remaining ``second-layer'' DataWealth columns
*******************************************************************************/

qui gen EB = (DU - DX) * (CB + (CC * (AW / AV))) / (CH - BZ)
qui gen EC = (DU - DX) * ((CH - BZ - CB - CC) + CC * (AV - AW) / AV) / (CH - BZ)
qui gen AC = AZ * AU / AV
qui gen P = cond(BA > 0, BB * O / BA, 0) // Set to zero when BA == 0, following SZ
qui gen BI = BJ + BK
qui gen AI = AH - (AK - AL)

/*******************************************************************************
	(II.5) Take mid-year averages of USFA columns, following SZ
*******************************************************************************/

expand 2
sort year
by year: gen midyear = cond(_n == 1, year, year + 1)

* Drop years for which we can't get midyear averages
foreach bookend of numlist 1945 2020 {
	qui count if midyear == `bookend'
	assert `r(N)' == 1
}
qui drop if inlist(midyear, 1945, 2020)

#delimit ;
collapse (mean) AC AE AF AG AH AI AK AL AP AQ AS AU AV AW AX AZ BA BB BD BE BI BJ
	BK BO BS BU BZ CB CC CH DU DX EB EC EK FE FF FI FJ FK FL FM FN G I J K L O P 
	S T X Y Z dbtotal, by(midyear);
#delimit cr

rename midyear year

/*******************************************************************************
	(II.6) Merge on scorpinc series from SZ small files and use capitalization 
		with average 1997-2011 rate of return on S-corp equity
*******************************************************************************/

	/***************************************************************************
		(II.6.1) Merge on scorpinc, copied directly from SZ; these are.aggregates
			from their small`yr'.dta files. Ensure not missing for years after
			1965
	***************************************************************************/

qui merge 1:1 year using $dumpdir/szcopy.dta, assert(1 3) keepusing(scorpinc_sz) nogen

assert !missing(scorpinc_sz) if year > 1965

	/***************************************************************************
		(II.6.2) Use scorpinc to compute aggregate return on equity for years
			that we have it
	***************************************************************************/

qui gen EM = (scorpinc_sz / EK) // This is return on equity
assert inrange(EM, 0, 1) if !missing(EM)

	/***************************************************************************
		(II.6.3) Get average return on S-corporation equity from 1997-2011 and
			assume this average rate from 1966 to 1996. Then use this to get 
			capitalized totals from 1966 to 1996.
	***************************************************************************/

summ EM if inrange(year, 1997, 2011), meanonly
qui replace EM = `r(mean)' if inrange(year, 1966, 1996)

qui replace EK = scorpinc_sz / EM if inrange(year, 1966, 1996)

assert !missing(EK)
drop scorpinc_sz EM

/*******************************************************************************
	(II.7) Merge on ICI and create key columns in second layer
*******************************************************************************/

qui merge 1:1 year using $dumpdir/tsb5.dta, assert(1 3)
assert _merge == 3 if year >= 1991
assert _merge == 1 if year < 1991
drop _merge

rename (moneymarket EC) (S_TSB5 O_TSB5)
qui gen R_TSB5 = bond + 0.3 * hybrid
qui gen J_TSB5 = T - AC + (AV - AW - AX) * (AU / AV)

assert !missing(J_TSB5) & !missing(O_TSB5)
assert missing(R_TSB5) & missing(S_TSB5) if year < 1991

/* Extrapolate R_TSB5 and S_TSB5 backwards for 1981-1990; set as zero before that,  
	following SZ */
qui replace R_TSB5 = cond(year <= 1980, 0, 0.02 * (year - 1980) * J_TSB5) if year < 1991

foreach year of numlist 1979 1990 {
	sort year 
	qui count if year <= `year'
	local row`year' = `r(N)'
	assert year[`row`year''] == `year'
}

forv row = `row1990'(-1)`row1979' {
	local prevyr = `row' + 1

	assert missing(S_TSB5[`row'])

	qui replace S_TSB5 = S_TSB5[`prevyr'] * (O_TSB5 / O_TSB5[`prevyr']) in `row'
}

assert missing(S_TSB5) if year < 1980
qui replace S_TSB5 = 0 if year < 1980

assert !missing(O_TSB5) & !missing(R_TSB5) & !missing(S_TSB5)

/*******************************************************************************
	(II.8) Construct full set of ``third-layer'' DataWealth aggregates as 
		described in note
*******************************************************************************/

qui gen ttrentw = AK - AL

assert BA == 0 if year < 1974
assert BA > 0 if year > 1973
qui gen ttmmbondfund = cond(BA > 0, J_TSB5 - R_TSB5 + (O - (BB * O / BA) - FF) - S_TSB5, ///
							J_TSB5 - R_TSB5 - S_TSB5)
assert !missing(ttmmbondfund)

qui gen ttmiscw = G - J - O - S - AE - AF - AG - AH - AU - AP - AQ - AS - FM

qui gen ttdivw = I - FE - EB - EK

qui gen ttdeposits = AE + AF - FL // Match B.101.h concept from 1987-forward
qui gen ttinttaxw = ttdeposits + (X - FJ) + (Y - FK) - (O_TSB5 - R_TSB5 - S_TSB5) + AS

qui gen ttintexmw = Z - FI + AC + P

qui gen ttcurrency = AG - FN

qui gen ttpenw = BS + BU + AP + BO

qui gen ttpenw_incl_unfunded_db = dbtotal + BU + AP + BO

qui gen ttpeniraw = DU - DX

rename (BE AL BI AI EK BD) (ttmortw ttrentmortw ttothdebt ttschcpartw ttscorw ttrestw)

foreach mortcncpt of varlist ttmortw ttrentmortw ttothdebt {
	assert sign(`mortcncpt') == 1
	replace `mortcncpt' = -1 * `mortcncpt'
}
	
/*******************************************************************************
	(II.9) Merge on SCF aggregates for S-corporations, schedule C and 
		partnership businesses, and deposits; create adjusted measures of 
		ttscorw, ttschcpartw, and ttinttaxw
*******************************************************************************/

merge 1:1 year using `scf', assert(1 3) keepusing(ttschcpartw_scf ttscorw_scf ttdeposits_scf)
assert _merge == 3 if inrange(year, 1989, 2019) & mod(year - 1989, 3) == 0
assert !inrange(year, 1989, 2019) | mod(year - 1989, 3) > 0 if _merge == 1
drop _merge

foreach scalevar of varlist ttschcpartw ttscorw ttdeposits {
	gen scf_usfa_ratio_`scalevar' = `scalevar'_scf / `scalevar'

	* Interpolate values in non-SCF years from 1989-2019 linearly
	ipolate scf_usfa_ratio_`scalevar' year, gen(scf_usfa_ratio_`scalevar'_iplt)

	assert !missing(scf_usfa_ratio_`scalevar'_iplt) if year > 1988
	assert missing(scf_usfa_ratio_`scalevar') if year < 1989

	* Fill in values for pre-1989 years as average over pre-crisis SCF years
	qui summ scf_usfa_ratio_`scalevar' if year < 2007, detail
	replace scf_usfa_ratio_`scalevar'_iplt = `r(mean)' if missing(scf_usfa_ratio_`scalevar'_iplt)

	gen `scalevar'_scfscale = scf_usfa_ratio_`scalevar'_iplt * `scalevar' // Create adjusted concept
}

gen ttinttaxw_scfscale_deposits = ttinttaxw - ttdeposits + ttdeposits_scfscale

/*******************************************************************************
	(II.10) Merge on SCF-scaled credit card debt from II.0 and create an adjusted
		ttothdebt measure
*******************************************************************************/

merge 1:1 year using `ccbal_scfscale', assert(3) nogen
merge 1:1 year using `usfa_midyear', keepusing(autoloans_usfa) assert(3) nogen

assert sign(ttothdebt) == -1

foreach posliabvar of varlist ccbal_usfa ccbal_scf autoloans_usfa {
	assert sign(`posliabvar') == 1 | `posliabvar' == 0
	replace `posliabvar' = -1 * `posliabvar'
}

gen ttothdebt_scfscale_creditcards = ttothdebt - ccbal_usfa + ccbal_scf
gen ttothdebt_excl_autoloans = ttothdebt - autoloans_usfa

gen ttothdebt_scfscalecc_exclautolns = ttothdebt - ccbal_usfa + ccbal_scf - autoloans_usfa

/*******************************************************************************
	(II.11) Disaggregate the ttmmbondfund concept into parts that were formerly
		ttinttaxw vs. formerly ttdivw
*******************************************************************************/

/* Conceptually, this is Bonds and loans held in mutual funds, minus IRAs 
	invested in fixed income mutual funds and IRAs invested in money market 
	mutual funds*/
gen ttmmbondfund_formerinttaxw = J_TSB5 - R_TSB5 - S_TSB5
assert !missing(ttmmbondfund_formerinttaxw)

assert BA == 0 if year < 1974
assert BA > 0 if year > 1973

/* Conceptually, this is Money market fund shares not invested in munis, or more
	precisely Total money market fund shares minus Total money market fund shares
	multiplied by Munis held by money market funds divided by Total money market 
	fund shares liabilities */
gen ttmmbondfund_formerdivw = cond(year > 1973, (O - (BB * O / BA) - FF), 0)
assert !missing(ttmmbondfund_formerdivw)

gen ttmmbondfund_check = ttmmbondfund_formerinttaxw + ttmmbondfund_formerdivw
assert inrange(ttmmbondfund / ttmmbondfund_check, 0.999999, 1.000001)
drop ttmmbondfund_check

keep year tt* ccbal_* autoloans_usfa

/*******************************************************************************
	(II.12) Sort and save
*******************************************************************************/

sort year

export delimited using $outdir/parameters_wealth_expanded.csv, replace